#libraries
#data:Rodents habitat
db<-read.csv("C:\\Users\\Montero\\Desktop\\Escritorio 2024\\Betametrica\\MODULO IV\\Bolger.csv")
attach(db)
#Modelo logit vs probit
logit=glm(RODENTSP~PERSHRUB+DISTX+AGE, data=db,family=binomial(link = "logit"))
summary(logit)
##
## Call:
## glm(formula = RODENTSP ~ PERSHRUB + DISTX + AGE, family = binomial(link = "logit"),
## data = db)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.9099159 3.1125426 -1.899 0.0576 .
## PERSHRUB 0.0958695 0.0406119 2.361 0.0182 *
## DISTX 0.0003087 0.0007741 0.399 0.6900
## AGE 0.0250077 0.0376618 0.664 0.5067
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 34.617 on 24 degrees of freedom
## Residual deviance: 19.358 on 21 degrees of freedom
## AIC: 27.358
##
## Number of Fisher Scoring iterations: 5
probit<-glm(RODENTSP~PERSHRUB+DISTX+AGE, data=db,
family=binomial(link="probit"))
summary (probit)
##
## Call:
## glm(formula = RODENTSP ~ PERSHRUB + DISTX + AGE, family = binomial(link = "probit"),
## data = db)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.7370090 1.7574065 -2.126 0.03347 *
## PERSHRUB 0.0592018 0.0220816 2.681 0.00734 **
## DISTX 0.0002038 0.0004366 0.467 0.64059
## AGE 0.0184713 0.0209535 0.882 0.37803
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 34.617 on 24 degrees of freedom
## Residual deviance: 19.131 on 21 degrees of freedom
## AIC: 27.131
##
## Number of Fisher Scoring iterations: 6
memisc::mtable(logit,probit, digits = 6, sdigits = 5,summary.stats = T)
##
## Calls:
## logit: glm(formula = RODENTSP ~ PERSHRUB + DISTX + AGE, family = binomial(link = "logit"),
## data = db)
## probit: glm(formula = RODENTSP ~ PERSHRUB + DISTX + AGE, family = binomial(link = "probit"),
## data = db)
##
## ===========================================
## logit probit
## -------------------------------------------
## (Intercept) -5.909916 -3.737009*
## (3.112543) (1.757406)
## PERSHRUB 0.095869* 0.059202**
## (0.040612) (0.022082)
## DISTX 0.000309 0.000204
## (0.000774) (0.000437)
## AGE 0.025008 0.018471
## (0.037662) (0.020953)
## -------------------------------------------
## Log-likelihood -9.67879 -9.56556
## N 25 25
## ===========================================
## Significance: *** = p < 0.001;
## ** = p < 0.01;
## * = p < 0.05
#el antilog del los coefientes (b)de la tabla son los odd-ratios
hl1<-hoslem.test(db$RODENTSP,fitted(logit),g=2)
hl2<-hoslem.test(db$RODENTSP,fitted(probit),g=2)
hl1
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: db$RODENTSP, fitted(logit)
## X-squared = 0.64712, df = 0, p-value < 2.2e-16
hl2
##
## Hosmer and Lemeshow goodness of fit (GOF) test
##
## data: db$RODENTSP, fitted(probit)
## X-squared = 0.73995, df = 0, p-value < 2.2e-16
#Se rechaza hypothesis de H0 : hay bondad de ajuste
# Valor umbral (sin criterio técnico) el promedio de los
#valores predichos
threshold_logit<- mean(fitted(logit)) #definión del umbral sin criterio técnico
threshold_probit<- mean(fitted(probit))
threshold_logit
## [1] 0.48
threshold_probit
## [1] 0.4857771
#tabla de clasificación use library(QuantPsyc)
#para esta tabla de clasificación el valor UMBRAL ES DETERMINANTE EN LOS RESULTADOS!!!
# SI SE CAMBIA LOS RESULTADOS CAMBIAN
CrossTable(db$RODENTSP,fitted(logit)>threshold_logit, expected=FALSE) #se puede hacer con solo esta linea
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 25
##
##
## | fitted(logit) > threshold_logit
## db$RODENTSP | FALSE | TRUE | Row Total |
## -------------|-----------|-----------|-----------|
## 0 | 11 | 2 | 13 |
## | 1.901 | 2.419 | |
## | 0.846 | 0.154 | 0.520 |
## | 0.786 | 0.182 | |
## | 0.440 | 0.080 | |
## -------------|-----------|-----------|-----------|
## 1 | 3 | 9 | 12 |
## | 2.059 | 2.621 | |
## | 0.250 | 0.750 | 0.480 |
## | 0.214 | 0.818 | |
## | 0.120 | 0.360 | |
## -------------|-----------|-----------|-----------|
## Column Total | 14 | 11 | 25 |
## | 0.560 | 0.440 | |
## -------------|-----------|-----------|-----------|
##
##
ClassLog(logit,db$RODENTSP, cut=threshold_logit)
## $rawtab
## resp
## 0 1
## FALSE 11 3
## TRUE 2 9
##
## $classtab
## resp
## 0 1
## FALSE 0.8461538 0.2500000
## TRUE 0.1538462 0.7500000
##
## $overall
## [1] 0.8
##
## $mcFadden
## [1] 0.4408127
CrossTable(db$RODENTSP,fitted(probit)>threshold_probit, expected=FALSE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 25
##
##
## | fitted(probit) > threshold_probit
## db$RODENTSP | FALSE | TRUE | Row Total |
## -------------|-----------|-----------|-----------|
## 0 | 11 | 2 | 13 |
## | 1.901 | 2.419 | |
## | 0.846 | 0.154 | 0.520 |
## | 0.786 | 0.182 | |
## | 0.440 | 0.080 | |
## -------------|-----------|-----------|-----------|
## 1 | 3 | 9 | 12 |
## | 2.059 | 2.621 | |
## | 0.250 | 0.750 | 0.480 |
## | 0.214 | 0.818 | |
## | 0.120 | 0.360 | |
## -------------|-----------|-----------|-----------|
## Column Total | 14 | 11 | 25 |
## | 0.560 | 0.440 | |
## -------------|-----------|-----------|-----------|
##
##
ClassLog(probit,db$RODENTSP, cut=threshold_probit)
## $rawtab
## resp
## 0 1
## FALSE 11 3
## TRUE 2 9
##
## $classtab
## resp
## 0 1
## FALSE 0.8461538 0.2500000
## TRUE 0.1538462 0.7500000
##
## $overall
## [1] 0.8
##
## $mcFadden
## [1] 0.4473546
#Ahora evaluamos la capacidad predistiva del modelo usando otros criterios
# por ejemplo: la curva ROC
pred_logit<-prediction(logit$fitted.values, db$RODENTSP)
pred_probit<-prediction(probit$fitted.values, db$RODENTSP)
perf_logit<-performance(pred_logit,measure="tpr",#tpr=true positive rate
x.measure ="fpr") # fpr= fall positive rate
perf_probit<-performance(pred_probit,measure="tpr",#tpr=true positive rate
x.measure ="fpr")
plot(perf_logit, colorize=T,lty=3)
abline(0,1,col="black")
plot(perf_probit, colorize=T,lty=3)
abline(0,1,col="black")
#Recuperando el valor del à rea bajo de la curva: mas área mejor
modelo
aucp_logit<-performance(pred_logit,measure="auc") #Area under the ROC curve
aucp_logit<-aucp_logit@y.values[[1]]
aucp_logit
## [1] 0.8910256
aucp_probit<-performance(pred_logit,measure="auc") #Area under the ROC curve
aucp_probit<-aucp_probit@y.values[[1]]
aucp_probit
## [1] 0.8910256
#Punto de corte óptimo
#Usado library Epi SOLO PARA LOGIT
ROC(form = RODENTSP~PERSHRUB+DISTX+AGE, plot="ROC")
#Punto de corte optimo SOLO PARA LOGIT
#es donde la sensitividad y la especificidad de intersectan
ROC(form = RODENTSP~PERSHRUB+DISTX+AGE, plot="sp")#sp=especificidad
#=====para el PROBIT======
(perf1<-performance(pred_probit,"sens","spec"))
## A performance instance
## 'Specificity' vs. 'Sensitivity' (alpha: 'Cutoff')
## with 26 data points
sen<-slot(perf1,"y.values")[[1]]
esp<-slot(perf1,"x.values")[[1]]
alfa<-slot(perf1,"alpha.values")[[1]]
mat<-data.frame(sen,esp,alfa)
mat
## sen esp alfa
## 1 0.00000000 1.00000000 Inf
## 2 0.08333333 1.00000000 0.98409915
## 3 0.16666667 1.00000000 0.97397108
## 4 0.25000000 1.00000000 0.97170427
## 5 0.33333333 1.00000000 0.96005790
## 6 0.41666667 1.00000000 0.95281624
## 7 0.50000000 1.00000000 0.94905729
## 8 0.58333333 1.00000000 0.93598925
## 9 0.66666667 1.00000000 0.91631191
## 10 0.66666667 0.92307692 0.72112136
## 11 0.75000000 0.92307692 0.60957106
## 12 0.75000000 0.84615385 0.52020615
## 13 0.75000000 0.76923077 0.38845386
## 14 0.75000000 0.69230769 0.37995355
## 15 0.83333333 0.69230769 0.32195291
## 16 0.91666667 0.69230769 0.31077470
## 17 0.91666667 0.61538462 0.31025353
## 18 0.91666667 0.53846154 0.23824176
## 19 0.91666667 0.46153846 0.19660582
## 20 1.00000000 0.46153846 0.11579631
## 21 1.00000000 0.38461538 0.10875966
## 22 1.00000000 0.30769231 0.07960922
## 23 1.00000000 0.23076923 0.07785613
## 24 1.00000000 0.15384615 0.04795703
## 25 1.00000000 0.07692308 0.03983344
## 26 1.00000000 0.00000000 0.03347414
#=====para el LOGIT======
(perf2<-performance(pred_logit,"sens","spec"))
## A performance instance
## 'Specificity' vs. 'Sensitivity' (alpha: 'Cutoff')
## with 26 data points
sen2<-slot(perf2,"y.values")[[1]]
esp2<-slot(perf2,"x.values")[[1]]
alfa2<-slot(perf2,"alpha.values")[[1]]
mat2<-data.frame(sen2,esp2,alfa2)
mat2
## sen2 esp2 alfa2
## 1 0.00000000 1.00000000 Inf
## 2 0.08333333 1.00000000 0.97070655
## 3 0.16666667 1.00000000 0.96358890
## 4 0.25000000 1.00000000 0.95942634
## 5 0.33333333 1.00000000 0.94965637
## 6 0.41666667 1.00000000 0.93711358
## 7 0.50000000 1.00000000 0.93388591
## 8 0.58333333 1.00000000 0.91020726
## 9 0.66666667 1.00000000 0.90074886
## 10 0.66666667 0.92307692 0.71404230
## 11 0.75000000 0.92307692 0.62155265
## 12 0.75000000 0.84615385 0.51434036
## 13 0.75000000 0.76923077 0.39453758
## 14 0.75000000 0.69230769 0.38440837
## 15 0.83333333 0.69230769 0.34126211
## 16 0.91666667 0.69230769 0.32541846
## 17 0.91666667 0.61538462 0.29057568
## 18 0.91666667 0.53846154 0.22286175
## 19 0.91666667 0.46153846 0.16397005
## 20 0.91666667 0.38461538 0.10286861
## 21 1.00000000 0.38461538 0.09491477
## 22 1.00000000 0.30769231 0.09285278
## 23 1.00000000 0.23076923 0.07525586
## 24 1.00000000 0.15384615 0.04855852
## 25 1.00000000 0.07692308 0.04844846
## 26 1.00000000 0.00000000 0.03879793
#Ahora para graficar las dos curvas
library(reshape2)
library(gridExtra)
##
## Adjuntando el paquete: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
m<-melt(mat,id=c("alfa"))
m2<-melt(mat2,id=c("alfa2"))
p1<-ggplot(m,aes(alfa,value,group=variable,
colour=variable))+
geom_line(size=1.2)+
labs(title="punto de corte para logit")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p1
p2<-ggplot(m2,aes(alfa2,value,group=variable,
colour=variable))+
geom_line(size=1.2)+
labs(title="punto de corte para probit")
p2
library(plotly)
## Warning: package 'plotly' was built under R version 4.4.3
##
## Adjuntando el paquete: 'plotly'
## The following objects are masked from 'package:memisc':
##
## rename, style
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(p1)
ggplotly(p2)
g1<-grid.arrange(p1,p2,ncol=2)
g1
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
names(db)
## [1] "PERSHRUB" "DISTX" "AGE" "RODENTSP"
newdata<-data.frame(PERSHRUB=50, DISTX=500, AGE=20, RODENTSP=1)
newdata
## PERSHRUB DISTX AGE RODENTSP
## 1 50 500 20 1
predict(logit,newdata,type="response")
## 1
## 0.3865283
predict(probit,newdata,type="response")
## 1
## 0.3799659